# -*- coding:utf-8 -*-

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
from urllib.request import urlretrieve
import requests


def getEncoding(html):
    if html.encoding == 'ISO-8859-1':
        encodings = requests.utils.get_encodings_from_content(html.text)
        if encodings:
            encoding = encodings[0]
        else:
            encoding = html.apparent_encoding
    return encoding


def getHtmlText(url):
    chrome_options = Options()
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--disable-gpu')
    driver = webdriver.Chrome(chrome_options=chrome_options)
    # driver = webdriver.PhantomJS()
    time.sleep(1)
    driver.get(url)
    time.sleep(1)
    page_text = driver.page_source
    # assert u"百度" in driver.title
    driver.close()
    return page_text


def findTargetList(text):
    global num
    soup = BeautifulSoup(text, 'html.parser')
    tagtext = soup.find_all('ul', attrs={'class': 'comic-pic-list'})[0]
    url_next = soup.find('a', attrs={'class': 'nav-ctrl-btn next'}).attrs['href']
    lisoup = BeautifulSoup(str(tagtext), 'html.parser')
    for li_list in lisoup.find_all('img', attrs={'class': 'comic-pic'}):
        img_url = li_list.attrs['data-src']
        print(img_url)
        urlretrieve(img_url, '/Users/god/Documents/img/%s.jpg' % num)
        num = num + 1
    print('url_next:', url_next)
    print('num:',num)
    return url_next


def getAllImgList(host, url):
    text = getHtmlText(host + url)
    url_next = findTargetList(text)
    if url_next != None and len(url_next.strip()) != 0:
        return getAllImgList(host, url_next)


# def xxx(n):
#     result = ""
#     for i in range(2, n+1):
#         if n<i:
#             break
#         value, delta, count = n//i, n%i, 0
#         while delta == 0 and value != 1:
#             n, count = value, count+1
#             delta = n%i
#             if delta == 0:
#                 value = n//i
#         if value == 1 and delta == 0:
#             count = count + 1
#         if count != 0:
#             if len(result) != 0:
#                 result += "*"
#                 # print('*', end="")
#             result += str(i) + ("^" + str(count) if count > 1 else "")
#             # print(str(i) + ("^" + str(count) if count > 1 else ""), end="")
#     return result


num = 17379


if __name__ == '__main__':
    host = "http://m.ac.qq.com"
    url = "/chapter/index/id/505430/cid/869"
    imglist = getAllImgList(host, url)
    for imgurl in imglist:
        urlretrieve(imgurl, '/Users/god/Documents/img/%s.jpg' % num)
        num = num + 1
    print(len(imglist))
    print('ending')

    # print(xxx(80))



